# Load libraries
library(tidyverse)
library(tidyterra)
library(tidync)
library(ggridges)
library(readxl)
library(janitor)
library(lubridate)
library(sdmTMB)
library(ncdf4)
library(patchwork)
library(terra)
library(viridis)
library(devtools)
library(ggsidekick)
theme_set(theme_sleek())
library(crayon)
library(marmap)
library(tidylog)
# Point to wd
home <- here::here()
# Load all custom functions in R/function
# - map-plot [source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")]
# - callCopernicusCovariate
# - extractCovariateAtLocation
for (fun in list.files(paste0(home, "/R/functions"))) {
source(paste(home, "R/functions", fun, sep = "/"))
}Collate larval density data
Explore data
Read and clean data
Old data
# Get the species we do length modelling on and subset density data accordingly
# For other studies, we could explore using more species!
species_with_length <- readr::read_csv(paste0(home, "/data/clean/larval_size.csv")) |>
distinct(species) |>
pull()
# 1992-2008
density_old <- read_excel(paste0(home, "/data/larvea/1992_2010 MIK SWE Alla arter.xlsx"),
sheet = 1,
skip = 20
)
# Clean data!
density_old <- density_old |>
clean_names() |>
mutate(haul_id = paste(year, month, day, haul, sep = "_")) |>
mutate(species = fct_recode(species, "Chirolophis ascanii" = "Chirolophis ascanii")) |>
filter(species %in% species_with_length) |>
drop_na(no_m2) |>
dplyr::select(no_m2, species, haul_id)
# Read trawl data and match in coordinates and get unique hauls
trawl_old <- read_excel(paste0(home, "/data/larvea/1992-2010 MIK SWE Tråldata.xlsx"),
sheet = 1,
skip = 8
) |>
clean_names() |>
# the last two coordinate columns are decimal degrees of haul position
rename(
haul = haul_no,
lat = lat_decim_20,
lon = long_decim_21
) |>
# two rows without info, including year, so I'm dropping these
drop_na(year) |>
mutate(haul_id = paste(year, month, day, haul, sep = "_")) |>
distinct(haul_id, .keep_all = TRUE) |>
dplyr::select(haul_id, lat, lon, year, month, day)
# No we need to add the zero catches. Make a species * haul id
trawl_old <- expand_grid(trawl_old, species = unique(density_old$species)) |>
arrange(haul_id, species)
# Join trawl data to length data. If there's no match, replace NA with 0!
old <- trawl_old |>
left_join(density_old, by = c("haul_id", "species")) |>
mutate(
period = "old",
day = as.numeric(day),
month = as.numeric(month),
no_m2 = replace_na(no_m2, 0)
)New data
# 2008-2022
density_new <- read_excel(paste0(home, "/data/larvea/ELDB(s) bara fisk 2008-2024.xlsx")) |>
clean_names() |>
mutate(species = str_to_sentence(species)) |>
mutate(species = fct_recode(species, "Chirolophis ascanii" = "Chirolophis ascanii")) |>
filter(species %in% species_with_length) |>
summarise(n = n(), .by = c(species, haul_id))
# By haul id and species, summarise (1 row = 1 individual)
# trawl_new <- read_excel(paste0(home, "/data/larvea/ELDB 2008-2024.xlsx")) |>
trawl_new <- read_excel(paste0(home, "/data/larvea/ELDB 2008-2024 - rättade djup.xlsx")) |>
filter(Notes != "ogiltigt!" | is.na(Notes)) |>
clean_names() |>
rename(
lat = start_latitud,
lon = start_longitud
) |>
dplyr::select(year, day, month, haul_id, national_haul_id, lat, lon, depth_lower, flow_int_revs, flow_int_calibr)
# No we need to add the zero catches. Make a species * haul id
trawl_new <- expand_grid(trawl_new, species = unique(density_new$species)) |>
arrange(haul_id, species)
# Join trawl data to length data. If there's no match, replace NA with 0!
# Then make sure to remove species-year combinations where I have no catch at all (also no length obviously)
# Hence, zero catches are only within a year where we at least caught something
new <- trawl_new |>
left_join(density_new, by = c("haul_id", "species")) |>
mutate(
period = "new",
day = as.numeric(day),
month = as.numeric(month),
n = replace_na(n, 0)
) |>
mutate(max = max(n), .by = c(species, year)) |>
filter(max > 0) |>
dplyr::select(-max)
# new |>
# filter(max == 0) |>
# distinct(species, year)
# Calculate density using the same formula as in the old data
new |>
group_by(year) |>
summarise_all(function(x) mean(is.na(x)))# A tibble: 16 × 13
year day month haul_id national_haul_id lat lon depth_lower
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2008 0 0 0 0 0 0 0
2 2009 0 0 0 0 0 0 0
3 2010 0 0 0 0 0 0 0
4 2012 0 0 0 0 0 0 0
5 2013 0 0 0 0 0 0 0
6 2014 0 0 0 0 0 0 0
7 2015 0 0 0 0 0 0 0.0154
8 2016 0 0 0 0 0 0 0
9 2017 0 0 0 0 0 0 0
10 2018 0 0 0 0 0 0 0
11 2019 0 0 0 0 0 0 0
12 2020 0 0 0 0 0 0 0.0227
13 2021 0 0 0 0 0 0 0
14 2022 0 0 0 0 0 0 0
15 2023 0 0 0 0 0 0 0
16 2024 0 0 0 0 0 0 0
# ℹ 5 more variables: flow_int_revs <dbl>, flow_int_calibr <dbl>,
# species <dbl>, n <dbl>, period <dbl>
# Replace NA in flow_int_calibr with the most common value
new |> summarise(n = n(), .by = flow_int_calibr) |> drop_na()# A tibble: 5 × 2
flow_int_calibr n
<dbl> <int>
1 3.33 2894
2 3.33 689
3 3.33 558
4 3.33 637
5 3 12
new <- new |>
filter(flow_int_revs > 0) |>
mutate(
flow_int_calibr = replace_na(flow_int_calibr, 3.33),
eff_flow = flow_int_revs / flow_int_calibr,
no_m2 = (n / (3.14 * eff_flow)) * (depth_lower + 5)
) |>
dplyr::select(-flow_int_revs, -flow_int_calibr, -eff_flow, -depth_lower, -n) |>
mutate(year = as.numeric(year))Join old and new
overlapping_yrs <- intersect(new$year, old$year)
# new |> filter(year %in% overlapping_yrs) |> distinct(haul_id) |> nrow()
# old |> filter(year %in% overlapping_yrs) |> distinct(haul_id) |> nrow()
#
# new |> filter(year == 2009 & day == 4 & month == 2) |> distinct(national_haul_id, haul_id)
# old |> filter(year == 2009 & day == 4 & month == 2) |> distinct(haul_id)
#
# new |> filter(haul_id == "2009SE_151") |> arrange(species)
# new |> filter(national_haul_id == "2009SE_147") |> arrange(species)
# old |> filter(haul_id == "2009_02_04_151") |> arrange(species)
old <- old |> filter(!year %in% c(overlapping_yrs))
d <- bind_rows(old, new) |>
mutate(yday = yday(paste(year, month, day, sep = "-"))) |>
filter(lon > 8) |>
drop_na(lat)
# Add km UTM coords
d <- d |>
add_utm_columns(ll_names = c("lon", "lat"))Explore data
ggplot(d, aes(no_m2)) +
geom_histogram() +
facet_wrap(~species, scales = "free")`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 25 rows containing non-finite outside the scale range
(`stat_bin()`).
plot_map_fc +
geom_point(
data = d,
aes(X * 1000, Y * 1000, color = no_m2),
size = 0.5
) +
facet_wrap(~species) +
scale_color_viridis(trans = "sqrt")plot_map_fc +
geom_point(
data = d |>
mutate(pres = ifelse(no_m2 > 0, "y", "n")),
aes(X * 1000, Y * 1000, color = pres),
size = 0.1
) +
facet_wrap(~species)mutate: new variable 'pres' (character) with 3 unique values and <1% NA
Warning: `guide_colourbar()` needs continuous scales.
Add covariate to hauls
# Specify covariates path for simplicity
covPath <- paste0(home, "/data/covariates")Satellite derived temperatures
https://data.marine.copernicus.eu/product/SST_BAL_SST_L4_REP_OBSERVATIONS_010_016/description
## Load satellite derived SST.
# Source: https://data.marine.copernicus.eu/product/SST_BAL_SST_L4_REP_OBSERVATIONS_010_016/download
# Print details
print(nc_open(paste(covPath, "sst", "DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/larval-sizes/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc (NC_FORMAT_NETCDF4):
1 variables (excluding dimension variables):
float analysed_sst[longitude,latitude,time] (Contiguous storage)
units: kelvin
_FillValue: NaN
standard_name: sea_surface_foundation_temperature
long_name: Analysed sea surface temperature
3 dimensions:
latitude Size:355
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: 48
valid_max: 66
longitude Size:342
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:1462
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
11 global attributes:
Conventions: CF-1.11
title: Baltic Sea SST analysis, daily reprocessed level 4 analysis
institution: Danish Meteorological Institute, DMI
source: ESA SST CCI, C3S and CMEMS FMI and SMHI Sea ice concentration
history: Version 1.0
references: Høyer, J. L. and She, J., Optimal interpolation of sea surface temperature for the North Sea and Baltic Sea, J. Mar. Sys., Vol 65, 1-4, pp. 176-189, 2007, Høyer, J. L., Le Borgne, P., & Eastwood, S. (2014). A bias correction method for Arctic satellite sea surface temperature observations. Remote Sensing of Environment, 146, 201-213.
comment: IN NO EVENT SHALL DMI OR ITS REPRESENTATIVES BE LIABLE FOR ANY DAMAGES WHATSOEVER INCLUDING, WITHOUT LIMITATION, SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR DAMAGES FOR LOSS OF BUSINESS PROFITS OR SAVINGS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION OR OTHER PECUNIARY LOSS ARISING OUT OF THE USE OF OR THE BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION OR OTHER PECUNIARY LOSS ARISING OUT OF THE USE OF OR THE INABILITY TO USE THIS DMI PRODUCT, EVEN IF DMI HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION SHALL APPLY TO CLAIMS OF PERSONAL INJURY TO THE EXTENT PERMITTED BY LAW. SOME COUNTRIES OR STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF LIABILITY FOR CONSEQUENTIAL, SPECIAL, INDIRECT, INCIDENTAL DAMAGES AND, ACCORDINGLY, SOME PORTIONS OF THESE LIMITATIONS MAY NOT APPLY TO YOU. BY USING THIS PRODUCT, YOU HAVE ACCEPTED THAT THE ABOVE LIMITATIONS OR THE MAXIMUM LEGALLY APPLICABLE SUBSET OF THESE LIMITATIONS APPLY TO YOUR USE OF THIS PRODUCT. WARNING Some applications are unable to properly handle signed bytevalues. If values are encountered > 127, please subtract 256 from this reported value
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: SST_BAL_SST_L4_REP_OBSERVATIONS_010_016
subset:datasetId: DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_202012
subset:date: 2024-03-30T12:33:28.633Z
# Load and gather the temperature data in a tibble
temp_tibble <- callCopernicusCovariate("sst", messages = 1)Processing - Gathering data from df 1/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 2/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 3/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 4/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 5/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 6/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 7/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 8/8.
mutate: new variable 'date' (double) with 1,521 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 69,342,258 rows (90%), 7,868,265 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 253,815 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Note - The data were filtered internally for January month
Completed
# Visualize temperature frequency distribution
hist(temp_tibble$sst)# Visualize temperature spatial distribution
# plot_map +
# geom_point(data = temp_tibble,
# aes(X*1000, Y*1000, color = sst))
# Obtain temporal availability, this will be the temporal window to filter the data
unique(temp_tibble$year) [1] 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
[16] 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
[31] 2021 2022 2023
# Trim years we have temperature for (again, annoying! Fix the temperatures later)
d <- d |>
filter(year %in% unique(temp_tibble$year))filter: removed 416 rows (2%), 19,891 rows remaining
# Loop through all year combos, extract the temperatures at the data locations
d <- extractCovariateAtLocation(
"sst", # Name of the covariate to extract. One of: sst, chlorophyll, depth.
d, # A df containing the set of yearand locations to be evaluated.
temp_tibble, # A df containing the covariate at location
changesYearly = 1, # Is the covariate time variant (e.g. temp) or not (e.g. depth)
"temp", # Name to give to the covariate evaluated at location in the df
messages = 1 # dichotomous
)Processing - Gathering covariate information at location for year 1/31:1992.
filter: removed 19,111 rows (96%), 780 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 2/31:1993.
filter: removed 19,291 rows (97%), 600 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 3/31:1994.
filter: removed 19,399 rows (98%), 492 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 4/31:1995.
filter: removed 19,327 rows (97%), 564 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 5/31:1996.
filter: removed 19,363 rows (97%), 528 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 6/31:1997.
filter: removed 19,447 rows (98%), 444 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 7/31:1998.
filter: removed 19,219 rows (97%), 672 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 8/31:1999.
filter: removed 19,159 rows (96%), 732 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 9/31:2000.
filter: removed 19,195 rows (97%), 696 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 10/31:2001.
filter: removed 19,039 rows (96%), 852 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 11/31:2002.
filter: removed 19,195 rows (97%), 696 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 12/31:2003.
filter: removed 19,147 rows (96%), 744 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 13/31:2004.
filter: removed 19,171 rows (96%), 720 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 14/31:2005.
filter: removed 19,399 rows (98%), 492 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 15/31:2006.
filter: removed 19,291 rows (97%), 600 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 16/31:2007.
filter: removed 19,219 rows (97%), 672 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 17/31:2008.
filter: removed 19,363 rows (97%), 528 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 18/31:2009.
filter: removed 19,286 rows (97%), 605 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 19/31:2010.
filter: removed 19,341 rows (97%), 550 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 20/31:2012.
filter: removed 19,195 rows (97%), 696 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 21/31:2013.
filter: removed 19,124 rows (96%), 767 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 22/31:2014.
filter: removed 19,150 rows (96%), 741 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 23/31:2015.
filter: removed 19,111 rows (96%), 780 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 24/31:2016.
filter: removed 19,085 rows (96%), 806 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 25/31:2017.
filter: removed 19,085 rows (96%), 806 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 26/31:2018.
filter: removed 19,291 rows (97%), 600 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 27/31:2019.
filter: removed 19,189 rows (96%), 702 rows remaining
filter: removed 1,638,906 rows (95%), 92,994 rows remaining
Processing - Gathering covariate information at location for year 28/31:2020.
filter: removed 19,332 rows (97%), 559 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Processing - Gathering covariate information at location for year 29/31:2021.
filter: removed 19,399 rows (98%), 492 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Processing - Gathering covariate information at location for year 30/31:2022.
filter: removed 19,410 rows (98%), 481 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Processing - Gathering covariate information at location for year 31/31:2023.
filter: removed 19,397 rows (98%), 494 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Completed
Satellite derived chlorophyll abundance
https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/description
## Load satellite derived chlorophyll
# Source: https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/download
# Print details
print(nc_open(paste(covPath, "chlorophyll", "cmems_mod_glo_bgc_my_0.25_P1D-m_1713795613611_01012017_12312022.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/larval-sizes/data/covariates/chlorophyll/cmems_mod_glo_bgc_my_0.25_P1D-m_1713795613611_01012017_12312022.nc (NC_FORMAT_NETCDF4):
6 variables (excluding dimension variables):
float chl[longitude,latitude,depth,time] (Contiguous storage)
units: mg m-3
_FillValue: NaN
standard_name: mass_concentration_of_chlorophyll_a_in_sea_water
long_name: Total Chlorophyll
float no3[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_nitrate_in_sea_water
long_name: Nitrate
float nppv[longitude,latitude,depth,time] (Contiguous storage)
units: mg m-3 day-1
_FillValue: NaN
standard_name: net_primary_production_of_biomass_expressed_as_carbon_per_unit_volume_in_sea_water
long_name: Total Primary Production of Phyto
float o2[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
long_name: Dissolved Oxygen
float po4[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_phosphate_in_sea_water
long_name: Phosphate
float si[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_silicate_in_sea_water
long_name: Dissolved Silicate
4 dimensions:
depth Size:1
standard_name: depth
long_name: Depth
units: m
unit_long: Meters
axis: Z
positive: down
valid_min: 0.505760014057159
valid_max: 5902.05810546875
latitude Size:13
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: -80
valid_max: 90
longitude Size:21
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:2191
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
12 global attributes:
Conventions: CF-1.11
title: Daily mean fields for product GLOBAL_REANALYSIS_BIO_001_029
institution: Mercator Ocean
producer: CMEMS - Global Monitoring and Forecasting Centre
source: MERCATOR FREEBIORYS2V4
credit: E.U. Copernicus Marine Service Information (CMEMS)
contact: servicedesk.cmems@mercator-ocean.eu
references: http://marine.copernicus.eu
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: GLOBAL_MULTIYEAR_BGC_001_029
subset:datasetId: cmems_mod_glo_bgc_my_0.25_P1D-m_202112
subset:date: 2024-04-22T14:20:13.611Z
# Load and gather the temperature data in a tibble
chl_tibble <- callCopernicusCovariate("chlorophyll", messages = 1)Processing - Gathering data from df 1/4.
mutate: new variable 'date' (double) with 2,191 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 6 unique values and 0% NA
filter: removed 276,690 rows (92%), 25,668 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 828 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 2/4.
mutate: new variable 'date' (double) with 2,922 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 8 unique values and 0% NA
filter: removed 369,012 rows (92%), 34,224 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 1,104 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 3/4.
mutate: new variable 'date' (double) with 3,288 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 9 unique values and 0% NA
filter: removed 415,242 rows (92%), 38,502 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 1,242 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 4/4.
mutate: new variable 'date' (double) with 2,556 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 7 unique values and 0% NA
filter: removed 322,782 rows (92%), 29,946 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 966 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Note - The data were filtered internally for January month
Completed
# Visualize chlorophyll frequency distribution
hist(chl_tibble$chl)# Visualize chlorophyll spatial distribution
# plot_map +
# geom_point(data = chl_tibble,
# aes(X*1000, Y*1000, color = chl))
# Obtain temporal availability, this will be the temporal window to filter the data
sort(unique(chl_tibble$year)) [1] 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
[16] 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
# Trim years we have chlorophyll for.
d <- d |>
filter(year %in% unique(chl_tibble$year)) # We loose 13% of the data by including chl.filter: removed 1,274 rows (6%), 18,617 rows remaining
# Loop through all year combos, extract the chl at the data locations
d <- extractCovariateAtLocation(
"chl",
d,
chl_tibble,
changesYearly = 1,
"chl",
messages = 1
)Processing - Gathering covariate information at location for year 1/29:1993.
filter: removed 18,017 rows (97%), 600 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 2/29:1994.
filter: removed 18,125 rows (97%), 492 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 3/29:1995.
filter: removed 18,053 rows (97%), 564 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 4/29:1996.
filter: removed 18,089 rows (97%), 528 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 5/29:1997.
filter: removed 18,173 rows (98%), 444 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 6/29:1998.
filter: removed 17,945 rows (96%), 672 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 7/29:1999.
filter: removed 17,885 rows (96%), 732 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 8/29:2000.
filter: removed 17,921 rows (96%), 696 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 9/29:2001.
filter: removed 17,765 rows (95%), 852 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 10/29:2002.
filter: removed 17,921 rows (96%), 696 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 11/29:2003.
filter: removed 17,873 rows (96%), 744 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 12/29:2004.
filter: removed 17,897 rows (96%), 720 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 13/29:2005.
filter: removed 18,125 rows (97%), 492 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 14/29:2006.
filter: removed 18,017 rows (97%), 600 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 15/29:2007.
filter: removed 17,945 rows (96%), 672 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 16/29:2008.
filter: removed 18,089 rows (97%), 528 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 17/29:2009.
filter: removed 18,012 rows (97%), 605 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 18/29:2010.
filter: removed 18,067 rows (97%), 550 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 19/29:2012.
filter: removed 17,921 rows (96%), 696 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 20/29:2013.
filter: removed 17,850 rows (96%), 767 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 21/29:2014.
filter: removed 17,876 rows (96%), 741 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 22/29:2015.
filter: removed 17,837 rows (96%), 780 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 23/29:2016.
filter: removed 17,811 rows (96%), 806 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 24/29:2017.
filter: removed 17,811 rows (96%), 806 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 25/29:2018.
filter: removed 18,017 rows (97%), 600 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 26/29:2019.
filter: removed 17,915 rows (96%), 702 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 27/29:2020.
filter: removed 18,058 rows (97%), 559 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 28/29:2021.
filter: removed 18,125 rows (97%), 492 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 29/29:2022.
filter: removed 18,136 rows (97%), 481 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Completed
Satellite derived depth
https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/description
# Generate a depth box containing the bathymetries.
depth_box <- getNOAA.bathy(min(d$lon) - .1, max(d$lon) + .1, min(d$lat) - .1, max(d$lat) + .1)Querying NOAA database ...
This may take seconds to minutes, depending on grid size
Building bathy matrix ...
# Obtain depth at locations.
d <- cbind(
d,
get.depth(depth_box, x = d$lon, y = d$lat, locator = F)["depth"]
)
## Convert to strictly positive values.
d$depth <- d$depth * (-1)
# Check
plot_map +
geom_point(
data = d,
aes(X * 1000, Y * 1000, color = depth)
)d <- d |> tidylog::filter(depth > 0)filter: removed 12 rows (<1%), 18,605 rows remaining
Check covariates
# Get the proportion of observations not assigned with a covariate value at prior steps
colMeans(is.na(d)) haul_id lat lon year
0.0000000000 0.0000000000 0.0000000000 0.0000000000
month day species no_m2
0.0000000000 0.0000000000 0.0000000000 0.0013437248
period national_haul_id yday X
0.0000000000 0.5101854340 0.0000000000 0.0000000000
Y temp chl depth
0.0000000000 0.0000000000 0.0006449879 0.0000000000
Plot response variables
d |>
summarise(n = n(), .by = species) |>
arrange(desc(n)) species n
1 Ammodytidae 1532
2 Aphia minuta 1532
3 Chirolophis ascanii 1532
4 Clupea harengus 1532
5 Crystallogobius linearis 1532
6 Microstomus kitt 1532
7 Myoxocephalus scorpius 1532
8 Pholis gunnellus 1532
9 Pomatoschistus sp 1532
10 Syngnathus rostellatus 1532
11 Agonus cataphractus 1474
12 Anguilla anguilla 1288
13 Sardina pilchardus 523
# Distribution of data
ggplot(d, aes(no_m2)) +
geom_histogram() +
facet_wrap(~species, scales = "free")# Effect of day of the year
ggplot(d, aes(yday, no_m2)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
facet_wrap(~species, scales = "free")# Effect of year
ggplot(d, aes(year, no_m2)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
facet_wrap(~species, scales = "free")# Effect of temperature
ggplot(d, aes(temp, no_m2)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
# geom_smooth() +
facet_wrap(~species, scales = "free")# Effect of chlorophyll
ggplot(d, aes(chl, no_m2)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
# geom_smooth() +
facet_wrap(~species, scales = "free")Save data
write_csv(d, paste0(home, "/data/clean/larval_density.csv"))